{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'mesolitica-storage.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from google.cloud import storage\n",
    "client = storage.Client()\n",
    "bucket = client.bucket('mesolitica-general')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "!rm -rf t5-base-bahasa\n",
    "!mkdir t5-base-bahasa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob = bucket.blob('t5-base/model.ckpt-487600.data-00000-of-00002')\n",
    "blob.download_to_filename('t5-base-bahasa/model.ckpt-487600.data-00000-of-00002')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob = bucket.blob('t5-base/model.ckpt-487600.data-00001-of-00002')\n",
    "blob.download_to_filename('t5-base-bahasa/model.ckpt-487600.data-00001-of-00002')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob = bucket.blob('t5-base/model.ckpt-487600.index')\n",
    "blob.download_to_filename('t5-base-bahasa/model.ckpt-487600.index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob = bucket.blob('t5-base/model.ckpt-487600.meta')\n",
    "blob.download_to_filename('t5-base-bahasa/model.ckpt-487600.meta')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob = bucket.blob('t5-base/checkpoint')\n",
    "blob.download_to_filename('t5-base-bahasa/checkpoint')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob = bucket.blob('t5-base/operative_config.gin')\n",
    "blob.download_to_filename('t5-base-bahasa/operative_config.gin')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob = bucket.blob('t5-base/events.out.tfevents.1589423125.general')\n",
    "blob.download_to_filename('t5-base-bahasa/events.out.tfevents.1589423125.general')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "!cp sp10m.cased.t5* t5-base-bahasa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip3 install transformers -U\n",
    "from transformers import T5Config, T5Model, load_tf_weights_in_t5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "out = 't5-base-bahasa-cased'\n",
    "os.makedirs(out, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "T5Config {\n",
      "  \"d_ff\": 3072,\n",
      "  \"d_kv\": 64,\n",
      "  \"d_model\": 768,\n",
      "  \"decoder_start_token_id\": 0,\n",
      "  \"dropout_rate\": 0.1,\n",
      "  \"eos_token_id\": 1,\n",
      "  \"initializer_factor\": 1.0,\n",
      "  \"inputs_length\": 1024,\n",
      "  \"is_encoder_decoder\": true,\n",
      "  \"layer_norm_epsilon\": 1e-06,\n",
      "  \"model_type\": \"t5\",\n",
      "  \"n_positions\": 1024,\n",
      "  \"num_heads\": 12,\n",
      "  \"num_layers\": 12,\n",
      "  \"pad_token_id\": 0,\n",
      "  \"relative_attention_num_buckets\": 32,\n",
      "  \"vocab_size\": 32128\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "config = T5Config(\n",
    "    vocab_size = 32128,\n",
    "    n_positions=1024,\n",
    "    d_ff = 3072,\n",
    "    d_kv = 64,\n",
    "    d_model = 768,\n",
    "    dropout_rate = 0.1,\n",
    "    inputs_length = 1024,\n",
    "    num_heads = 12,\n",
    "    num_layers = 12,\n",
    "    decoder_start_token_id = 0,\n",
    "    eos_token_id = 1,\n",
    "    pad_token_id = 0)\n",
    "print(config)\n",
    "config.save_pretrained(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "T5Model(\n",
       "  (shared): Embedding(32128, 768)\n",
       "  (encoder): T5Stack(\n",
       "    (embed_tokens): Embedding(32128, 768)\n",
       "    (block): ModuleList(\n",
       "      (0): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (relative_attention_bias): Embedding(32, 12)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (1): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (2): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (3): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (4): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (5): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (6): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (7): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (8): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (9): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (10): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (11): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "    (final_layer_norm): T5LayerNorm()\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       "  (decoder): T5Stack(\n",
       "    (embed_tokens): Embedding(32128, 768)\n",
       "    (block): ModuleList(\n",
       "      (0): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (relative_attention_bias): Embedding(32, 12)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (relative_attention_bias): Embedding(32, 12)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (1): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (2): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (3): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (4): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (5): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (6): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (7): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (8): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (9): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (10): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "      (11): T5Block(\n",
       "        (layer): ModuleList(\n",
       "          (0): T5LayerSelfAttention(\n",
       "            (SelfAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (1): T5LayerCrossAttention(\n",
       "            (EncDecAttention): T5Attention(\n",
       "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
       "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (2): T5LayerFF(\n",
       "            (DenseReluDense): T5DenseReluDense(\n",
       "              (wi): Linear(in_features=768, out_features=3072, bias=False)\n",
       "              (wo): Linear(in_features=3072, out_features=768, bias=False)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (layer_norm): T5LayerNorm()\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "    (final_layer_norm): T5LayerNorm()\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = T5Model(config)\n",
    "load_tf_weights_in_t5(model, config, 't5-base-bahasa/model.ckpt-487600')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('config.json', 'pytorch_model.bin')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import CONFIG_NAME, WEIGHTS_NAME\n",
    "CONFIG_NAME, WEIGHTS_NAME"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "torch.save(model.state_dict(), out + '/' + WEIGHTS_NAME)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import T5Config, T5Model, T5Tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('t5-base-bahasa-cased/spiece.model',\n",
       " 't5-base-bahasa-cased/special_tokens_map.json',\n",
       " 't5-base-bahasa-cased/added_tokens.json')"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer = T5Tokenizer('sp10m.cased.t5.model')\n",
    "tokenizer.save_pretrained(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = T5Tokenizer.from_pretrained('./t5-base-bahasa-cased', lower = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "config = T5Config.from_pretrained('./t5-base-bahasa-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = T5Model.from_pretrained('./t5-base-bahasa-cased/pytorch_model.bin', config = config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_pretrained(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !transformers-cli upload ./t5-base-bahasa-cased"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import T5Tokenizer, T5ForConditionalGeneration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c968352236ff43d2bf902f786b3a6dd3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891688674.0, style=ProgressStyle(descri…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Mahathir Mohamad'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer = T5Tokenizer.from_pretrained('huseinzol05/t5-base-bahasa-cased')\n",
    "model = T5ForConditionalGeneration.from_pretrained('huseinzol05/t5-base-bahasa-cased')\n",
    "input_ids = tokenizer.encode('soalan: siapakah perdana menteri malaysia?', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids)\n",
    "tokenizer.decode(outputs[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Denali'"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_ids = tokenizer.encode('soalan: gunung apa paling tinggi?', return_tensors = 'pt')\n",
    "outputs = model.generate(input_ids)\n",
    "tokenizer.decode(outputs[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "t5-base-bahasa/\n",
      "t5-base-bahasa/sp10m.cased.t5.model\n",
      "t5-base-bahasa/model.ckpt-487600.data-00001-of-00002\n",
      "t5-base-bahasa/model.ckpt-487600.meta\n",
      "t5-base-bahasa/model.ckpt-487600.data-00000-of-00002\n",
      "t5-base-bahasa/model.ckpt-487600.index\n",
      "t5-base-bahasa/checkpoint\n",
      "t5-base-bahasa/sp10m.cased.t5.vocab\n",
      "t5-base-bahasa/events.out.tfevents.1589423125.general\n",
      "t5-base-bahasa/operative_config.gin\n"
     ]
    }
   ],
   "source": [
    "!tar -czvf t5-base-bahasa.gz t5-base-bahasa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "\n",
    "bucketName = 'huseinhouse-storage'\n",
    "Key = 't5-base-bahasa.gz'\n",
    "outPutname = \"bert-bahasa/t5-base-05-20-2020.tar.gz\"\n",
    "\n",
    "s3 = boto3.client('s3')\n",
    "s3.upload_file(Key,bucketName,outPutname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
